/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 * PrincipalComponents.java
 * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
 */

package weka.filters.unsupervised.attribute;

import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.SparseInstance;
import weka.core.Utils;
import weka.core.Capabilities.Capability;
import weka.core.matrix.EigenvalueDecomposition;
import weka.core.matrix.Matrix;
import weka.filters.Filter;
import weka.filters.UnsupervisedFilter;

import java.util.Enumeration;
import java.util.Vector;

/**
 * <!-- globalinfo-start --> Performs a principal components analysis and
 * transformation of the data.<br/>
 * Dimensionality reduction is accomplished by choosing enough eigenvectors to
 * account for some percentage of the variance in the original data -- default
 * 0.95 (95%).<br/>
 * Based on code of the attribute selection scheme 'PrincipalComponents' by Mark
 * Hall and Gabi Schmidberger.
 * <p/>
 * <!-- globalinfo-end -->
 * 
 * <!-- options-start --> Valid options are:
 * <p/>
 * 
 * <pre>
 * -D
 *  Don't normalize input data.
 * </pre>
 * 
 * <pre>
 * -R &lt;num&gt;
 *  Retain enough PC attributes to account
 *  for this proportion of variance in the original data.
 *  (default: 0.95)
 * </pre>
 * 
 * <pre>
 * -A &lt;num&gt;
 *  Maximum number of attributes to include in 
 *  transformed attribute names.
 *  (-1 = include all, default: 5)
 * </pre>
 * 
 * <pre>
 * -M &lt;num&gt;
 *  Maximum number of PC attributes to retain.
 *  (-1 = include all, default: -1)
 * </pre>
 * 
 * <!-- options-end -->
 * 
 * @author Mark Hall (mhall@cs.waikato.ac.nz) -- attribute selection code
 * @author Gabi Schmidberger (gabi@cs.waikato.ac.nz) -- attribute selection code
 * @author fracpete (fracpete at waikato dot ac dot nz) -- filter code
 * @version $Revision: 6690 $
 */
public class PrincipalComponents extends Filter implements OptionHandler,
		UnsupervisedFilter {

	/** for serialization. */
	private static final long serialVersionUID = -5649876869480249303L;

	/** The data to transform analyse/transform. */
	protected Instances m_TrainInstances;

	/** Keep a copy for the class attribute (if set). */
	protected Instances m_TrainCopy;

	/** The header for the transformed data format. */
	protected Instances m_TransformedFormat;

	/** Data has a class set. */
	protected boolean m_HasClass;

	/** Class index. */
	protected int m_ClassIndex;

	/** Number of attributes. */
	protected int m_NumAttribs;

	/** Number of instances. */
	protected int m_NumInstances;

	/** Correlation matrix for the original data. */
	protected double[][] m_Correlation;

	/**
	 * If true, center (rather than standardize) the data and compute PCA from
	 * covariance (rather than correlation) matrix.
	 */
	private boolean m_center = false;

	/**
	 * Will hold the unordered linear transformations of the (normalized)
	 * original data.
	 */
	protected double[][] m_Eigenvectors;

	/** Eigenvalues for the corresponding eigenvectors. */
	protected double[] m_Eigenvalues = null;

	/** Sorted eigenvalues. */
	protected int[] m_SortedEigens;

	/** sum of the eigenvalues. */
	protected double m_SumOfEigenValues = 0.0;

	/** Filters for replacing missing values. */
	protected ReplaceMissingValues m_ReplaceMissingFilter;

	/** Filter for turning nominal values into numeric ones. */
	protected NominalToBinary m_NominalToBinaryFilter;

	/**
	 * Filter for removing class attribute, nominal attributes with 0 or 1
	 * value.
	 */
	protected Remove m_AttributeFilter;

	/** Filter for standardizing the data */
	protected Standardize m_standardizeFilter;

	/** Filter for centering the data */
	protected Center m_centerFilter;

	/** The number of attributes in the pc transformed data. */
	protected int m_OutputNumAtts = -1;

	/**
	 * the amount of varaince to cover in the original data when retaining the
	 * best n PC's.
	 */
	protected double m_CoverVariance = 0.95;

	/** maximum number of attributes in the transformed attribute name. */
	protected int m_MaxAttrsInName = 5;

	/** maximum number of attributes in the transformed data (-1 for all). */
	protected int m_MaxAttributes = -1;

	/**
	 * Returns a string describing this filter.
	 * 
	 * @return a description of the filter suitable for displaying in the
	 *         explorer/experimenter gui
	 */
	public String globalInfo() {
		return "Performs a principal components analysis and transformation of "
				+ "the data.\n"
				+ "Dimensionality reduction is accomplished by choosing enough eigenvectors "
				+ "to account for some percentage of the variance in the original data -- "
				+ "default 0.95 (95%).\n"
				+ "Based on code of the attribute selection scheme 'PrincipalComponents' "
				+ "by Mark Hall and Gabi Schmidberger.";
	}

	/**
	 * Returns an enumeration describing the available options.
	 * 
	 * @return an enumeration of all the available options.
	 */
	public Enumeration listOptions() {
		Vector result = new Vector();

		result.addElement(new Option("\tCenter (rather than standardize) the"
				+ "\n\tdata and compute PCA using the covariance (rather"
				+ "\n\t than the correlation) matrix.", "C", 0, "-C"));

		result.addElement(new Option(
				"\tRetain enough PC attributes to account\n"
						+ "\tfor this proportion of variance in the original data.\n"
						+ "\t(default: 0.95)", "R", 1, "-R <num>"));

		result.addElement(new Option(
				"\tMaximum number of attributes to include in \n"
						+ "\ttransformed attribute names.\n"
						+ "\t(-1 = include all, default: 5)", "A", 1,
				"-A <num>"));

		result.addElement(new Option(
				"\tMaximum number of PC attributes to retain.\n"
						+ "\t(-1 = include all, default: -1)", "M", 1,
				"-M <num>"));

		return result.elements();
	}

	/**
	 * Parses a list of options for this object.
	 * <p/>
	 * 
	 * <!-- options-start --> Valid options are:
	 * <p/>
	 * 
	 * <pre>
	 * -D
	 *  Don't normalize input data.
	 * </pre>
	 * 
	 * <pre>
	 * -R &lt;num&gt;
	 *  Retain enough PC attributes to account
	 *  for this proportion of variance in the original data.
	 *  (default: 0.95)
	 * </pre>
	 * 
	 * <pre>
	 * -A &lt;num&gt;
	 *  Maximum number of attributes to include in 
	 *  transformed attribute names.
	 *  (-1 = include all, default: 5)
	 * </pre>
	 * 
	 * <pre>
	 * -M &lt;num&gt;
	 *  Maximum number of PC attributes to retain.
	 *  (-1 = include all, default: -1)
	 * </pre>
	 * 
	 * <!-- options-end -->
	 * 
	 * @param options
	 *            the list of options as an array of strings
	 * @throws Exception
	 *             if an option is not supported
	 */
	public void setOptions(String[] options) throws Exception {
		String tmpStr;

		tmpStr = Utils.getOption('R', options);
		if (tmpStr.length() != 0)
			setVarianceCovered(Double.parseDouble(tmpStr));
		else
			setVarianceCovered(0.95);

		tmpStr = Utils.getOption('A', options);
		if (tmpStr.length() != 0)
			setMaximumAttributeNames(Integer.parseInt(tmpStr));
		else
			setMaximumAttributeNames(5);

		tmpStr = Utils.getOption('M', options);
		if (tmpStr.length() != 0)
			setMaximumAttributes(Integer.parseInt(tmpStr));
		else
			setMaximumAttributes(-1);

		setCenterData(Utils.getFlag('C', options));
	}

	/**
	 * Gets the current settings of the filter.
	 * 
	 * @return an array of strings suitable for passing to setOptions
	 */
	public String[] getOptions() {
		Vector<String> result;

		result = new Vector<String>();

		result.add("-R");
		result.add("" + getVarianceCovered());

		result.add("-A");
		result.add("" + getMaximumAttributeNames());

		result.add("-M");
		result.add("" + getMaximumAttributes());

		if (getCenterData())
			result.add("-C");

		return result.toArray(new String[result.size()]);
	}

	/**
	 * Returns the tip text for this property
	 * 
	 * @return tip text for this property suitable for displaying in the
	 *         explorer/experimenter gui
	 */
	public String centerDataTipText() {
		return "Center (rather than standardize) the data. PCA will "
				+ "be computed from the covariance (rather than correlation) "
				+ "matrix";
	}

	/**
	 * Set whether to center (rather than standardize) the data. If set to true
	 * then PCA is computed from the covariance rather than correlation matrix.
	 * 
	 * @param center
	 *            true if the data is to be centered rather than standardized
	 */
	public void setCenterData(boolean center) {
		m_center = center;
	}

	/**
	 * Get whether to center (rather than standardize) the data. If true then
	 * PCA is computed from the covariance rather than correlation matrix.
	 * 
	 * @return true if the data is to be centered rather than standardized.
	 */
	public boolean getCenterData() {
		return m_center;
	}

	/**
	 * Returns the tip text for this property.
	 * 
	 * @return tip text for this property suitable for displaying in the
	 *         explorer/experimenter gui
	 */
	public String varianceCoveredTipText() {
		return "Retain enough PC attributes to account for this proportion of variance.";
	}

	/**
	 * Sets the amount of variance to account for when retaining principal
	 * components.
	 * 
	 * @param value
	 *            the proportion of total variance to account for
	 */
	public void setVarianceCovered(double value) {
		m_CoverVariance = value;
	}

	/**
	 * Gets the proportion of total variance to account for when retaining
	 * principal components.
	 * 
	 * @return the proportion of variance to account for
	 */
	public double getVarianceCovered() {
		return m_CoverVariance;
	}

	/**
	 * Returns the tip text for this property.
	 * 
	 * @return tip text for this property suitable for displaying in the
	 *         explorer/experimenter gui
	 */
	public String maximumAttributeNamesTipText() {
		return "The maximum number of attributes to include in transformed attribute names.";
	}

	/**
	 * Sets maximum number of attributes to include in transformed attribute
	 * names.
	 * 
	 * @param value
	 *            the maximum number of attributes
	 */
	public void setMaximumAttributeNames(int value) {
		m_MaxAttrsInName = value;
	}

	/**
	 * Gets maximum number of attributes to include in transformed attribute
	 * names.
	 * 
	 * @return the maximum number of attributes
	 */
	public int getMaximumAttributeNames() {
		return m_MaxAttrsInName;
	}

	/**
	 * Returns the tip text for this property.
	 * 
	 * @return tip text for this property suitable for displaying in the
	 *         explorer/experimenter gui
	 */
	public String maximumAttributesTipText() {
		return "The maximum number of PC attributes to retain.";
	}

	/**
	 * Sets maximum number of PC attributes to retain.
	 * 
	 * @param value
	 *            the maximum number of attributes
	 */
	public void setMaximumAttributes(int value) {
		m_MaxAttributes = value;
	}

	/**
	 * Gets maximum number of PC attributes to retain.
	 * 
	 * @return the maximum number of attributes
	 */
	public int getMaximumAttributes() {
		return m_MaxAttributes;
	}

	/**
	 * Returns the capabilities of this evaluator.
	 * 
	 * @return the capabilities of this evaluator
	 * @see Capabilities
	 */
	public Capabilities getCapabilities() {
		Capabilities result = super.getCapabilities();
		result.disableAll();

		// attributes
		result.enable(Capability.NOMINAL_ATTRIBUTES);
		result.enable(Capability.NUMERIC_ATTRIBUTES);
		result.enable(Capability.DATE_ATTRIBUTES);
		result.enable(Capability.MISSING_VALUES);

		// class
		result.enable(Capability.NOMINAL_CLASS);
		result.enable(Capability.NUMERIC_CLASS);
		result.enable(Capability.DATE_CLASS);
		result.enable(Capability.MISSING_CLASS_VALUES);
		result.enable(Capability.NO_CLASS);

		return result;
	}

	/**
	 * Determines the output format based on the input format and returns this.
	 * In case the output format cannot be returned immediately, i.e.,
	 * immediateOutputFormat() returns false, then this method will be called
	 * from batchFinished().
	 * 
	 * @param inputFormat
	 *            the input format to base the output format on
	 * @return the output format
	 * @throws Exception
	 *             in case the determination goes wrong
	 * @see #hasImmediateOutputFormat()
	 * @see #batchFinished()
	 */
	protected Instances determineOutputFormat(Instances inputFormat)
			throws Exception {
		double cumulative;
		FastVector attributes;
		int i;
		int j;
		StringBuffer attName;
		double[] coeff_mags;
		int num_attrs;
		int[] coeff_inds;
		double coeff_value;
		int numAttsLowerBound;

		if (m_Eigenvalues == null)
			return inputFormat;

		if (m_MaxAttributes > 0)
			numAttsLowerBound = m_NumAttribs - m_MaxAttributes;
		else
			numAttsLowerBound = 0;
		if (numAttsLowerBound < 0)
			numAttsLowerBound = 0;

		cumulative = 0.0;
		attributes = new FastVector();
		for (i = m_NumAttribs - 1; i >= numAttsLowerBound; i--) {
			attName = new StringBuffer();
			// build array of coefficients
			coeff_mags = new double[m_NumAttribs];
			for (j = 0; j < m_NumAttribs; j++)
				coeff_mags[j] = -Math.abs(m_Eigenvectors[j][m_SortedEigens[i]]);
			num_attrs = (m_MaxAttrsInName > 0) ? Math.min(m_NumAttribs,
					m_MaxAttrsInName) : m_NumAttribs;

			// this array contains the sorted indices of the coefficients
			if (m_NumAttribs > 0) {
				// if m_maxAttrsInName > 0, sort coefficients by decreasing
				// magnitude
				coeff_inds = Utils.sort(coeff_mags);
			} else {
				// if m_maxAttrsInName <= 0, use all coeffs in original order
				coeff_inds = new int[m_NumAttribs];
				for (j = 0; j < m_NumAttribs; j++)
					coeff_inds[j] = j;
			}
			// build final attName string
			for (j = 0; j < num_attrs; j++) {
				coeff_value = m_Eigenvectors[coeff_inds[j]][m_SortedEigens[i]];
				if (j > 0 && coeff_value >= 0)
					attName.append("+");
				attName.append(Utils.doubleToString(coeff_value, 5, 3)
						+ inputFormat.attribute(coeff_inds[j]).name());
			}
			if (num_attrs < m_NumAttribs)
				attName.append("...");

			attributes.addElement(new Attribute(attName.toString()));
			cumulative += m_Eigenvalues[m_SortedEigens[i]];

			if ((cumulative / m_SumOfEigenValues) >= m_CoverVariance)
				break;
		}

		if (m_HasClass)
			attributes.addElement(m_TrainCopy.classAttribute().copy());

		Instances outputFormat = new Instances(m_TrainCopy.relationName()
				+ "_principal components", attributes, 0);

		// set the class to be the last attribute if necessary
		if (m_HasClass)
			outputFormat.setClassIndex(outputFormat.numAttributes() - 1);

		m_OutputNumAtts = outputFormat.numAttributes();

		return outputFormat;
	}

	protected void fillCovariance() throws Exception {

		if (!m_center) {
			fillCorrelation();
			return;
		}

		double[] att = new double[m_TrainInstances.numInstances()];

		// now center the data by subtracting the mean
		m_centerFilter = new Center();
		m_centerFilter.setInputFormat(m_TrainInstances);
		m_TrainInstances = Filter.useFilter(m_TrainInstances, m_centerFilter);

		// now compute the covariance matrix
		m_Correlation = new double[m_NumAttribs][m_NumAttribs];

		for (int i = 0; i < m_NumAttribs; i++) {
			for (int j = 0; j < m_NumAttribs; j++) {

				double cov = 0;
				for (int k = 0; k < m_NumInstances; k++) {

					if (i == j) {
						cov += (m_TrainInstances.instance(k).value(i) * m_TrainInstances
								.instance(k).value(i));
					} else {
						cov += (m_TrainInstances.instance(k).value(i) * m_TrainInstances
								.instance(k).value(j));
					}
				}

				cov /= (double) (m_TrainInstances.numInstances() - 1);
				m_Correlation[i][j] = cov;
				m_Correlation[j][i] = cov;
			}
		}
	}

	/**
	 * Fill the correlation matrix.
	 */
	protected void fillCorrelation() throws Exception {
		int i;
		int j;
		int k;
		double[] att1;
		double[] att2;
		double corr;

		m_Correlation = new double[m_NumAttribs][m_NumAttribs];
		att1 = new double[m_NumInstances];
		att2 = new double[m_NumInstances];

		for (i = 0; i < m_NumAttribs; i++) {
			for (j = 0; j < m_NumAttribs; j++) {
				for (k = 0; k < m_NumInstances; k++) {
					att1[k] = m_TrainInstances.instance(k).value(i);
					att2[k] = m_TrainInstances.instance(k).value(j);
				}
				if (i == j) {
					m_Correlation[i][j] = 1.0;
				} else {
					corr = Utils.correlation(att1, att2, m_NumInstances);
					m_Correlation[i][j] = corr;
					m_Correlation[j][i] = corr;
				}
			}
		}

		// now standardize the input data
		m_standardizeFilter = new Standardize();
		m_standardizeFilter.setInputFormat(m_TrainInstances);
		m_TrainInstances = Filter.useFilter(m_TrainInstances,
				m_standardizeFilter);
	}

	/**
	 * Transform an instance in original (unormalized) format.
	 * 
	 * @param instance
	 *            an instance in the original (unormalized) format
	 * @return a transformed instance
	 * @throws Exception
	 *             if instance can't be transformed
	 */
	protected Instance convertInstance(Instance instance) throws Exception {
		Instance result;
		double[] newVals;
		Instance tempInst;
		double cumulative;
		int i;
		int j;
		double tempval;
		int numAttsLowerBound;

		newVals = new double[m_OutputNumAtts];
		tempInst = (Instance) instance.copy();

		m_ReplaceMissingFilter.input(tempInst);
		m_ReplaceMissingFilter.batchFinished();
		tempInst = m_ReplaceMissingFilter.output();

		m_NominalToBinaryFilter.input(tempInst);
		m_NominalToBinaryFilter.batchFinished();
		tempInst = m_NominalToBinaryFilter.output();

		if (m_AttributeFilter != null) {
			m_AttributeFilter.input(tempInst);
			m_AttributeFilter.batchFinished();
			tempInst = m_AttributeFilter.output();
		}

		if (!m_center) {
			m_standardizeFilter.input(tempInst);
			m_standardizeFilter.batchFinished();
			tempInst = m_standardizeFilter.output();
		} else {
			m_centerFilter.input(tempInst);
			m_centerFilter.batchFinished();
			tempInst = m_centerFilter.output();
		}

		if (m_HasClass)
			newVals[m_OutputNumAtts - 1] = instance
					.value(instance.classIndex());

		if (m_MaxAttributes > 0)
			numAttsLowerBound = m_NumAttribs - m_MaxAttributes;
		else
			numAttsLowerBound = 0;
		if (numAttsLowerBound < 0)
			numAttsLowerBound = 0;

		cumulative = 0;
		for (i = m_NumAttribs - 1; i >= numAttsLowerBound; i--) {
			tempval = 0.0;
			for (j = 0; j < m_NumAttribs; j++)
				tempval += m_Eigenvectors[j][m_SortedEigens[i]]
						* tempInst.value(j);

			newVals[m_NumAttribs - i - 1] = tempval;
			cumulative += m_Eigenvalues[m_SortedEigens[i]];
			if ((cumulative / m_SumOfEigenValues) >= m_CoverVariance)
				break;
		}

		// create instance
		if (instance instanceof SparseInstance)
			result = new SparseInstance(instance.weight(), newVals);
		else
			result = new Instance(instance.weight(), newVals);

		return result;
	}

	/**
	 * Initializes the filter with the given input data.
	 * 
	 * @param instances
	 *            the data to process
	 * @throws Exception
	 *             in case the processing goes wrong
	 * @see #batchFinished()
	 */
	protected void setup(Instances instances) throws Exception {
		int i;
		int j;
		Vector<Integer> deleteCols;
		int[] todelete;
		double[][] v;
		Matrix corr;
		EigenvalueDecomposition eig;
		Matrix V;

		m_TrainInstances = new Instances(instances);

		// make a copy of the training data so that we can get the class
		// column to append to the transformed data (if necessary)
		m_TrainCopy = new Instances(m_TrainInstances, 0);

		m_ReplaceMissingFilter = new ReplaceMissingValues();
		m_ReplaceMissingFilter.setInputFormat(m_TrainInstances);
		m_TrainInstances = Filter.useFilter(m_TrainInstances,
				m_ReplaceMissingFilter);

		m_NominalToBinaryFilter = new NominalToBinary();
		m_NominalToBinaryFilter.setInputFormat(m_TrainInstances);
		m_TrainInstances = Filter.useFilter(m_TrainInstances,
				m_NominalToBinaryFilter);

		// delete any attributes with only one distinct value or are all missing
		deleteCols = new Vector<Integer>();
		for (i = 0; i < m_TrainInstances.numAttributes(); i++) {
			if (m_TrainInstances.numDistinctValues(i) <= 1)
				deleteCols.addElement(i);
		}

		if (m_TrainInstances.classIndex() >= 0) {
			// get rid of the class column
			m_HasClass = true;
			m_ClassIndex = m_TrainInstances.classIndex();
			deleteCols.addElement(new Integer(m_ClassIndex));
		}

		// remove columns from the data if necessary
		if (deleteCols.size() > 0) {
			m_AttributeFilter = new Remove();
			todelete = new int[deleteCols.size()];
			for (i = 0; i < deleteCols.size(); i++)
				todelete[i] = ((Integer) (deleteCols.elementAt(i))).intValue();
			m_AttributeFilter.setAttributeIndicesArray(todelete);
			m_AttributeFilter.setInvertSelection(false);
			m_AttributeFilter.setInputFormat(m_TrainInstances);
			m_TrainInstances = Filter.useFilter(m_TrainInstances,
					m_AttributeFilter);
		}

		// can evaluator handle the processed data ? e.g., enough attributes?
		getCapabilities().testWithFail(m_TrainInstances);

		m_NumInstances = m_TrainInstances.numInstances();
		m_NumAttribs = m_TrainInstances.numAttributes();

		// fillCorrelation();
		fillCovariance();

		// get eigen vectors/values
		corr = new Matrix(m_Correlation);
		eig = corr.eig();
		V = eig.getV();
		v = new double[m_NumAttribs][m_NumAttribs];
		for (i = 0; i < v.length; i++) {
			for (j = 0; j < v[0].length; j++)
				v[i][j] = V.get(i, j);
		}
		m_Eigenvectors = (double[][]) v.clone();
		m_Eigenvalues = (double[]) eig.getRealEigenvalues().clone();

		// any eigenvalues less than 0 are not worth anything --- change to 0
		for (i = 0; i < m_Eigenvalues.length; i++) {
			if (m_Eigenvalues[i] < 0)
				m_Eigenvalues[i] = 0.0;
		}
		m_SortedEigens = Utils.sort(m_Eigenvalues);
		m_SumOfEigenValues = Utils.sum(m_Eigenvalues);

		m_TransformedFormat = determineOutputFormat(m_TrainInstances);
		setOutputFormat(m_TransformedFormat);

		m_TrainInstances = null;
	}

	/**
	 * Sets the format of the input instances.
	 * 
	 * @param instanceInfo
	 *            an Instances object containing the input instance structure
	 *            (any instances contained in the object are ignored - only the
	 *            structure is required).
	 * @return true if the outputFormat may be collected immediately
	 * @throws Exception
	 *             if the input format can't be set successfully
	 */
	public boolean setInputFormat(Instances instanceInfo) throws Exception {
		super.setInputFormat(instanceInfo);

		m_Eigenvalues = null;
		m_OutputNumAtts = -1;
		m_AttributeFilter = null;
		m_NominalToBinaryFilter = null;
		m_SumOfEigenValues = 0.0;

		return false;
	}

	/**
	 * Input an instance for filtering. Filter requires all training instances
	 * be read before producing output.
	 * 
	 * @param instance
	 *            the input instance
	 * @return true if the filtered instance may now be collected with output().
	 * @throws IllegalStateException
	 *             if no input format has been set
	 * @throws Exception
	 *             if conversion fails
	 */
	public boolean input(Instance instance) throws Exception {
		Instance inst;

		if (getInputFormat() == null)
			throw new IllegalStateException("No input instance format defined");

		if (isNewBatch()) {
			resetQueue();
			m_NewBatch = false;
		}

		if (isFirstBatchDone()) {
			inst = convertInstance(instance);
			inst.setDataset(getOutputFormat());
			push(inst);
			return true;
		} else {
			bufferInput(instance);
			return false;
		}
	}

	/**
	 * Signify that this batch of input to the filter is finished.
	 * 
	 * @return true if there are instances pending output
	 * @throws NullPointerException
	 *             if no input structure has been defined,
	 * @throws Exception
	 *             if there was a problem finishing the batch.
	 */
	public boolean batchFinished() throws Exception {
		int i;
		Instances insts;
		Instance inst;

		if (getInputFormat() == null)
			throw new NullPointerException("No input instance format defined");

		insts = getInputFormat();

		if (!isFirstBatchDone())
			setup(insts);

		for (i = 0; i < insts.numInstances(); i++) {
			inst = convertInstance(insts.instance(i));
			inst.setDataset(getOutputFormat());
			push(inst);
		}

		flushInput();
		m_NewBatch = true;
		m_FirstBatchDone = true;

		return (numPendingOutput() != 0);
	}

	/**
	 * Returns the revision string.
	 * 
	 * @return the revision
	 */
	public String getRevision() {
		return RevisionUtils.extract("$Revision: 6690 $");
	}

	/**
	 * Main method for running this filter.
	 * 
	 * @param args
	 *            should contain arguments to the filter: use -h for help
	 */
	public static void main(String[] args) {
		runFilter(new PrincipalComponents(), args);
	}
}
